library(tidyverse)
library(skimr)
library(knitr)
library(kableExtra)
library(rvest)
library(reshape2)
library(gganimate)
library(magick)

SW <- readr::read_csv('https://raw.githubusercontent.com/youmrg/data_R/main/SW.csv')

le jeu des données

SW
## # A tibble: 6,201 x 6
##    gender event               medal  athlete        abb    year
##    <chr>  <chr>               <chr>  <chr>          <chr> <dbl>
##  1 Men    25 m Freestyle 1A   Gold   KENNY Mike     GBR    1980
##  2 Men    25 m Freestyle 1A   Silver KANTOLA Pekka  FIN    1980
##  3 Men    25 m Freestyle 1A   Bronze TIETZE H.      FRG    1980
##  4 Men    25 m Freestyle 1B   Gold   BURGER M.      CAN    1980
##  5 Men    25 m Freestyle 1B   Silver SLUPE G.       USA    1980
##  6 Men    25 m Freestyle 1B   Bronze MAKI Eero      FIN    1980
##  7 Men    25 m Freestyle 1C   Gold   SMYK Zbigniew  POL    1980
##  8 Men    25 m Freestyle 1C   Silver EMMEL Manfred  FRG    1980
##  9 Men    25 m Freestyle 1C   Bronze OCKVIRK Robert USA    1980
## 10 Men    50 m Freestyle CP C Gold   ADLER Kare     NOR    1980
## # ... with 6,191 more rows

Questions:

**1.Quel l’ordre des pays selon les nombre de medailles depuis le début des jeux ?

counter les medailles pour chaque pays

medal_count<- SW %>% filter(!is.na(medal))%>%
  group_by(abb, medal) %>%
  summarize(Count=length(medal)) 
medal_count
## # A tibble: 172 x 3
## # Groups:   abb [67]
##    abb   medal  Count
##    <chr> <chr>  <int>
##  1 ARG   Bronze     9
##  2 ARG   Gold       5
##  3 ARG   Silver    10
##  4 AUS   Bronze   160
##  5 AUS   Gold     147
##  6 AUS   Silver   158
##  7 AUT   Bronze     2
##  8 AUT   Gold       2
##  9 AUT   Silver     4
## 10 AZE   Gold       1
## # ... with 162 more rows

ordonner les pays par nombre de medailles

ord_med <- medal_count %>%
  group_by(abb) %>%
  summarize(Total=sum(Count)) %>%
  arrange(Total) %>%
  select(abb)
medal_count$abb <- factor(medal_count$abb, levels=ord_med$abb)
ord_med
## # A tibble: 67 x 1
##    abb  
##    <chr>
##  1 BAH  
##  2 BUL  
##  3 KAZ  
##  4 LTU  
##  5 MAR  
##  6 TTO  
##  7 VIE  
##  8 IPP  
##  9 SLO  
## 10 TCH  
## # ... with 57 more rows

le plot

ggplot(medal_count, aes(x=abb, y=Count, fill=medal)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values=c("gold1","gray70","gold4")) +
  ggtitle("les medailles de chaque pays dans l'histoire de la compéttition ") +
  theme(plot.title = element_text(hjust = 0.5))

**2.quel est le nombre de medailles par sex ?

ggplot(SW,aes(x= gender ,fill= medal))+
  geom_bar()+
  scale_fill_manual(values=c("gold1","gray70","gold4")) +
  ggtitle("nombre de medailles par sex ") +
  theme(plot.title = element_text(hjust = 0.5))

**3.quel est le nombre de medailles par sex pour chaque année ?

ggplot(SW,aes(x= gender ,fill= medal))+
  facet_wrap(~ year)+
  geom_bar()+
  scale_fill_manual(values=c("gold1","gray70","gold4")) +
  ggtitle("nombre de medailles par sex pour chaque année ") +
  theme(plot.title = element_text(hjust = 0.5))  

**4.quelle la distrubtion des medailles dans le monde en 1980 et 2016?
noc <- readr::read_csv('https://raw.githubusercontent.com/youmrg/data_R/main/noc_regions.csv')

noc = noc %>%
  rename(abb = NOC)
noc
## # A tibble: 230 x 3
##    abb   region      notes               
##    <chr> <chr>       <chr>               
##  1 AFG   Afghanistan <NA>                
##  2 AHO   Curacao     Netherlands Antilles
##  3 ALB   Albania     <NA>                
##  4 ALG   Algeria     <NA>                
##  5 AND   Andorra     <NA>                
##  6 ANG   Angola      <NA>                
##  7 ANT   Antigua     Antigua and Barbuda 
##  8 ANZ   Australia   Australasia         
##  9 ARG   Argentina   <NA>                
## 10 ARM   Armenia     <NA>                
## # ... with 220 more rows

Add regions to data and remove missing points

data_regions <- SW %>% 
  left_join(noc,by="abb") %>%
  filter(!is.na(region))

sous ensemble pour les jeux de 1980 et 2016,counter les athletes de chaque pays.

rio <- data_regions %>% 
  filter(year == "2016") %>%
  group_by(region) %>%
  summarize(Rio = length(unique(athlete)))

Arnhem_et_Veenendaal<- data_regions %>% 
  filter(year == "1980") %>%
  group_by(region) %>%
  summarize(Arnhem = length(unique(athlete)))

Create data for mapping

world <- map_data("world")
mapdat <- tibble(region=unique(world$region))
mapdat <- mapdat %>% 
  left_join(Arnhem_et_Veenendaal, by="region") %>%
  left_join(rio, by="region")
mapdat$Arnhem[is.na(mapdat$Arnhem)] <- 0
mapdat$Rio[is.na(mapdat$Rio)] <- 0
world <- left_join(world, mapdat, by="region")

Plot: Arnhem et Veenendaal 1980

ggplot(world, aes(x = long, y = lat, group = group)) +
  geom_polygon(aes(fill = Arnhem)) +
  labs(title = "Arnhem et Veenendaal  1980",
       x = NULL, y = NULL) +
  theme(axis.ticks = element_blank(),
        axis.text = element_blank(),
        panel.background = element_rect(fill = "navy"),
        plot.title = element_text(hjust = 0.5)) +
  guides(fill=guide_colourbar(title="Athletes")) +
  scale_fill_gradient2(low="white",high = "red")

Plot: Rio 2016

ggplot(world, aes(x = long, y = lat, group = group)) +
  geom_polygon(aes(fill = Rio)) +
  labs(title = "Rio 2016",
       x = NULL, y = NULL) +
  theme(axis.ticks = element_blank(),
        axis.text = element_blank(),
        panel.background = element_rect(fill = "navy"),
        plot.title = element_text(hjust = 0.5)) +
  guides(fill=guide_colourbar(title="Athletes")) +
  scale_fill_gradient2(low="white",high = "red")

**5.Quels sont les pays qui n’ont pas de médaille d’or mais ils ont les autres ?

data_abb_medal <- dcast(medal_count, abb ~ medal)
data_abb_medal[is.na(data_abb_medal)] <- 0
data_abb_medal
##    abb Bronze Gold Silver
## 1  BAH      1    0      0
## 2  BUL      0    0      1
## 3  KAZ      0    1      0
## 4  LTU      0    0      1
## 5  MAR      1    0      0
## 6  TTO      1    0      0
## 7  VIE      0    0      1
## 8  IPP      0    1      1
## 9  SLO      2    0      0
## 10 TCH      1    0      1
## 11 KUW      2    0      1
## 12 LUX      0    1      2
## 13 CRO      4    0      0
## 14 CYP      1    2      1
## 15 HKG      3    1      0
## 16 JAM      0    1      3
## 17 CUB      2    1      2
## 18 PER      2    2      1
## 19 SGP      1    3      1
## 20 ZIM      3    0      2
## 21 AUT      2    2      4
## 22 AZE      0    1      7
## 23 THA      4    1      3
## 24 EGY      6    1      2
## 25 POR      6    0      3
## 26 SVK      3    2      4
## 27 EST      3    2      5
## 28 COL      4    2      5
## 29 SUI      8    0      4
## 30 UZB      6    2      4
## 31 EUN      6    4      3
## 32 FRO      5    1      7
## 33 KOR      6    7      2
## 34 YUG      9    3      6
## 35 URS      9    0     11
## 36 ARG      9    5     10
## 37 IRL      7    9      9
## 38 CZE     14   12      4
## 39 BEL     13    6     12
## 40 FIN     19    5     13
## 41 GRE     11   10     17
## 42 BLR      9   21     14
## 43 ISL     27   14      8
## 44 RSA     13   24     15
## 45 MEX     22   24     12
## 46 NZL     14   30     19
## 47 ITA     25   18     30
## 48 HUN     33   32     23
## 49 JPN     42   35     25
## 50 ISR     41   31     41
## 51 RUS     42   33     42
## 52 NOR     35   54     43
## 53 BRA     47   35     56
## 54 DEN     67   37     40
## 55 FRG     39   63     56
## 56 GER     61   67     76
## 57 UKR     78   78     65
## 58 SWE     54   95     94
## 59 POL     81   91     89
## 60 FRA     93  107    103
## 61 NED     94  117    103
## 62 CHN    107  150    119
## 63 ESP    140  118    122
## 64 CAN    118  165    119
## 65 AUS    160  147    158
## 66 USA    192  241    185
## 67 GBR    212  204    252
no_gold_data <- subset(data_abb_medal, Gold == 0 & Silver>0 & Bronze>0)
print("les pays qui n'ont pas de médaille d'or mais ils ont les autres")
## [1] "les pays qui n'ont pas de médaille d'or mais ils ont les autres"
no_gold_data$abb
## [1] TCH KUW ZIM POR SUI URS
## 67 Levels: BAH BUL KAZ LTU MAR TTO VIE IPP SLO TCH KUW LUX CRO CYP HKG ... GBR

**6.

all_medal_sex <- SW%>% group_by(abb, medal, gender) %>%
  summarise(total = n())
all_medal_sex.wide <- dcast(all_medal_sex, abb ~ medal+gender)
all_medal_sex.wide[is.na(all_medal_sex.wide)] <- 0
all_medal_sex.wide
##    abb Bronze_Men Bronze_Mixed Bronze_Women Gold_Men Gold_Mixed Gold_Women
## 1  ARG          2            0            7        0          0          5
## 2  AUS         63            0           97       72          0         75
## 3  AUT          2            0            0        2          0          0
## 4  AZE          0            0            0        0          0          1
## 5  BAH          0            0            1        0          0          0
## 6  BEL          7            0            6        4          0          2
## 7  BLR          9            0            0       21          0          0
## 8  BRA         39            0            8       31          0          4
## 9  BUL          0            0            0        0          0          0
## 10 CAN         53            0           65       73          0         92
## 11 CHN         74            0           33      105          8         37
## 12 COL          4            0            0        2          0          0
## 13 CRO          4            0            0        0          0          0
## 14 CUB          2            0            0        1          0          0
## 15 CYP          0            0            1        0          0          2
## 16 CZE          8            0            6        5          0          7
## 17 DEN         43            0           24       28          0          9
## 18 EGY          6            0            0        1          0          0
## 19 ESP         80            0           60       69          0         49
## 20 EST          1            0            2        0          0          2
## 21 EUN          4            0            2        4          0          0
## 22 FIN          6            0           13        1          0          4
## 23 FRA         55            0           38       59          0         48
## 24 FRG         32            0            7       36          0         27
## 25 FRO          0            0            5        0          0          1
## 26 GBR         84            0          128      119          0         85
## 27 GER         23            0           38       25          0         42
## 28 GRE         10            0            1       10          0          0
## 29 HKG          2            0            1        1          0          0
## 30 HUN         22            0           11       25          0          7
## 31 IPP          0            0            0        1          0          0
## 32 IRL          6            0            1        6          0          3
## 33 ISL          8            0           19        6          0          8
## 34 ISR         32            0            9       25          0          6
## 35 ITA         21            0            4       14          0          4
## 36 JAM          0            0            0        0          0          1
## 37 JPN         33            0            9       12          0         23
## 38 KAZ          0            0            0        0          0          1
## 39 KOR          6            0            0        7          0          0
## 40 KUW          2            0            0        0          0          0
## 41 LTU          0            0            0        0          0          0
## 42 LUX          0            0            0        1          0          0
## 43 MAR          1            0            0        0          0          0
## 44 MEX         11            0           11       11          0         13
## 45 NED         57            0           37       71          0         46
## 46 NOR         17            0           18       34          0         20
## 47 NZL          9            0            5       11          0         19
## 48 PER          2            0            0        2          0          0
## 49 POL         56            0           25       60          0         31
## 50 POR          3            0            3        0          0          0
## 51 RSA         11            0            2       10          0         14
## 52 RUS         33            0            9       22          0         11
## 53 SGP          0            0            1        0          0          3
## 54 SLO          2            0            0        0          0          0
## 55 SUI          7            0            1        0          0          0
## 56 SVK          2            0            1        2          0          0
## 57 SWE         28            0           26       39          0         56
## 58 TCH          1            0            0        0          0          0
## 59 THA          4            0            0        1          0          0
## 60 TTO          0            0            1        0          0          0
## 61 UKR         52            7           19       62          0         16
## 62 URS          3            0            6        0          0          0
## 63 USA         84            0          108       89          0        152
## 64 UZB          3            0            3        1          0          1
## 65 VIE          0            0            0        0          0          0
## 66 YUG          8            0            1        3          0          0
## 67 ZIM          1            0            2        0          0          0
##    Silver_Men Silver_Mixed Silver_Women
## 1           3            0            7
## 2          78            0           80
## 3           4            0            0
## 4           3            0            4
## 5           0            0            0
## 6           7            0            5
## 7          14            0            0
## 8          44            6            6
## 9           0            0            1
## 10         50            0           69
## 11         99            0           20
## 12          5            0            0
## 13          0            0            0
## 14          2            0            0
## 15          0            0            1
## 16          0            0            4
## 17         27            0           13
## 18          2            0            0
## 19         69            0           53
## 20          0            0            5
## 21          2            0            1
## 22          5            0            8
## 23         51            0           52
## 24         41            0           15
## 25          0            0            7
## 26        119            0          133
## 27         25            0           51
## 28         15            0            2
## 29          0            0            0
## 30         15            0            8
## 31          1            0            0
## 32          4            0            5
## 33          4            0            4
## 34         35            0            6
## 35         23            0            7
## 36          0            0            3
## 37         21            0            4
## 38          0            0            0
## 39          2            0            0
## 40          1            0            0
## 41          1            0            0
## 42          2            0            0
## 43          0            0            0
## 44          6            0            6
## 45         57            0           46
## 46         26            0           17
## 47          8            0           11
## 48          1            0            0
## 49         63            0           26
## 50          0            0            3
## 51         13            0            2
## 52         32            0           10
## 53          0            0            1
## 54          0            0            0
## 55          4            0            0
## 56          0            0            4
## 57         44            0           50
## 58          1            0            0
## 59          3            0            0
## 60          0            0            0
## 61         41            0           24
## 62          9            0            2
## 63         62            0          123
## 64          1            0            3
## 65          1            0            0
## 66          6            0            0
## 67          0            0            2

**6a.Quel sont les pays où juste c’est les hommes qu’ont gagné la médaille d’or?

no_women_gold <- subset(all_medal_sex.wide, Gold_Women ==0 & Gold_Men>0 )
no_women_gold
##    abb Bronze_Men Bronze_Mixed Bronze_Women Gold_Men Gold_Mixed Gold_Women
## 3  AUT          2            0            0        2          0          0
## 7  BLR          9            0            0       21          0          0
## 12 COL          4            0            0        2          0          0
## 14 CUB          2            0            0        1          0          0
## 18 EGY          6            0            0        1          0          0
## 21 EUN          4            0            2        4          0          0
## 28 GRE         10            0            1       10          0          0
## 29 HKG          2            0            1        1          0          0
## 31 IPP          0            0            0        1          0          0
## 39 KOR          6            0            0        7          0          0
## 42 LUX          0            0            0        1          0          0
## 48 PER          2            0            0        2          0          0
## 56 SVK          2            0            1        2          0          0
## 59 THA          4            0            0        1          0          0
## 66 YUG          8            0            1        3          0          0
##    Silver_Men Silver_Mixed Silver_Women
## 3           4            0            0
## 7          14            0            0
## 12          5            0            0
## 14          2            0            0
## 18          2            0            0
## 21          2            0            1
## 28         15            0            2
## 29          0            0            0
## 31          1            0            0
## 39          2            0            0
## 42          2            0            0
## 48          1            0            0
## 56          0            0            4
## 59          3            0            0
## 66          6            0            0
print("countries where women never won gold medal but men has")
## [1] "countries where women never won gold medal but men has"
no_women_gold$abb
##  [1] "AUT" "BLR" "COL" "CUB" "EGY" "EUN" "GRE" "HKG" "IPP" "KOR" "LUX" "PER"
## [13] "SVK" "THA" "YUG"

**6b.Quel sont les pays où les femmes qu’ont gagné la médaille d’or ?

no_men_gold <- subset(all_medal_sex.wide, Gold_Women>0 & Gold_Men==0 )
no_men_gold 
##    abb Bronze_Men Bronze_Mixed Bronze_Women Gold_Men Gold_Mixed Gold_Women
## 1  ARG          2            0            7        0          0          5
## 4  AZE          0            0            0        0          0          1
## 15 CYP          0            0            1        0          0          2
## 20 EST          1            0            2        0          0          2
## 25 FRO          0            0            5        0          0          1
## 36 JAM          0            0            0        0          0          1
## 38 KAZ          0            0            0        0          0          1
## 53 SGP          0            0            1        0          0          3
##    Silver_Men Silver_Mixed Silver_Women
## 1           3            0            7
## 4           3            0            4
## 15          0            0            1
## 20          0            0            5
## 25          0            0            7
## 36          0            0            3
## 38          0            0            0
## 53          0            0            1
print("countries where men never won gold medal but women has")
## [1] "countries where men never won gold medal but women has"
no_men_gold$abb
## [1] "ARG" "AZE" "CYP" "EST" "FRO" "JAM" "KAZ" "SGP"
La décomposition en continent :

**7.Combien chaque continent à de médailles ?

# medal_continent
continent <-readr::read_csv('https://raw.githubusercontent.com/youmrg/data_R/main/data.csv')
continent = continent %>%
  rename(abb = Three_Letter_Country_Code)

medal_continent <- SW %>%
left_join(continent,by="abb") %>%
  filter(!is.na(Continent_Name))

medal_continent<- medal_continent %>% filter(!is.na(medal))%>%
  group_by(year,Continent_Name) %>%
  summarize(Count=length(medal)) 
medal_continent
## # A tibble: 55 x 3
## # Groups:   year [10]
##     year Continent_Name Count
##    <dbl> <chr>          <int>
##  1  1980 Africa             1
##  2  1980 Asia              25
##  3  1980 Europe           238
##  4  1980 North America    119
##  5  1980 Oceania           13
##  6  1980 South America     13
##  7  1984 Africa             1
##  8  1984 Asia              38
##  9  1984 Europe           455
## 10  1984 North America    212
## # ... with 45 more rows
sum_medal_cont <- medal_continent %>%
  group_by(Continent_Name) %>%
  summarize(nombre_de_medailles=sum(Count))
 sum_medal_cont 
## # A tibble: 6 x 2
##   Continent_Name nombre_de_medailles
##   <chr>                        <int>
## 1 Africa                          10
## 2 Asia                           765
## 3 Europe                        2759
## 4 North America                 1088
## 5 Oceania                        528
## 6 South America                  178
pie_chart<- sum_medal_cont %>% 
  mutate(perc = `nombre_de_medailles` / sum(`nombre_de_medailles`)) %>% 
  arrange(perc) %>%
  mutate(labels = scales::percent(perc))
pie_chart
## # A tibble: 6 x 4
##   Continent_Name nombre_de_medailles    perc labels
##   <chr>                        <int>   <dbl> <chr> 
## 1 Africa                          10 0.00188 0.2%  
## 2 South America                  178 0.0334  3.3%  
## 3 Oceania                        528 0.0991  9.9%  
## 4 Asia                           765 0.144   14.4% 
## 5 North America                 1088 0.204   20.4% 
## 6 Europe                        2759 0.518   51.8%
ggplot(pie_chart, aes(x = "", y = perc, fill = Continent_Name)) +
  geom_col() +
  coord_polar(theta = "y")

le graphique animé :

WP3 <- ggplot(data = medal_continent, aes(x = year, y = Count, group=Continent_Name, color=Continent_Name)) +
  geom_line() +
  geom_point() +
  ggtitle("Nombre de médailles entre 1980 et 2016") +
  ylab("Nombre de médailles") +
  xlab("Année")+
  theme_classic()+
  view_follow(fixed_x = TRUE, 
              fixed_y = TRUE) +
  transition_reveal(year)
WP3 <- animate(WP3, end_pause = 15)

WP3

WP <- ggplot(data = medal_continent) +
  geom_col(mapping = aes(x = Continent_Name, y = Count), 
           fill = "darkcyan") +
  theme_classic() +
  xlab("Région") +
  ylab("Nombre de téléphones (en milliers)") +
  transition_states(year,
                    transition_length = 2,
                    state_length = 1, 
                    wrap = TRUE) +
  ggtitle("Année : {closest_state}")

WP

4. Number of France gold medals over the time

## `summarise()` has grouped output by 'year', 'abb'. You can override using the `.groups` argument.